#### "Gender Inequality and Authoritarian Regimes" ####
# authors: "Lars Pelke"
# date: 2020-01-20

# clear workspace
rm(list=ls())

#libraries
library(tidyverse)
library(ggpubr)
library(panelr)
library(readstata13)
library(texreg)
library(dotwhisker)
library(broom.mixed)
library(countrycode)

library(lme4)
library(sjPlot)
library(sjmisc)
library(lfe)

library(MCMCpack)

# set working directory

#### Import datasets ####

##Import VDEM DATA
vdem <- read_csv("calculations/data/rawdata/vdem_9/V-Dem-CY-Full+Others-v9.csv") 
vdem <- vdem %>%
  rename(cown = COWcode)

gwf <- read.dta13("calculations/data/rawdata/GWF How Dictatorships Work/gwf_AllPoliticalRegimes.dta")
gwf <- gwf %>%
  rename(cown = cowcode) # rename country id variable

## Import EPR data ##

eprlong <- read.csv("calculations/data/rawdata/epr/EPR-2018.1.1.csv", as.is = T) %>%
  rename(country = statename) %>%
  arrange(country)

# compare country names in EPR to dataset
setdiff(ds.fullraw$country, eprlong$country)

# recode differing country names
eprlong$country[eprlong$country == "Czech Republic"] <- "Czechia"
eprlong$country[eprlong$country == "Republic of Korea"] <- "South Korea"
eprlong$country[eprlong$country == "Serbia" | 
                  eprlong$country == "Serbia and Montenegro"] <- "Yugoslavia/Serbia"

# transform into country-year data
eprlongcy <- eprlong %>% 
  rowwise() %>%
  do(data.frame(country = .$country,
                from = .$from,
                to = .$to,
                year = seq(.$from, .$to, by = 1),
                group = .$group,
                gwgroupid =.$gwgroupid,
                umbrella = .$umbrella,
                size = .$size,
                status = .$status)) %>%
  arrange(country, year)

# reduce dataset
epr <- eprlongcy %>%
  filter(year >= 1964)

# add country-year variable
epr$countryyear <- paste(epr$country, epr$year)

# create exclusion dummy variable
epr <- epr %>%
  mutate(exclusion = if_else(status == "DISCRIMINATED"  | 
                               status == "POWERLESS" |
                               status == "SELF-EXCLUSION", 1, 0))

# create exclusion ratio variable
epr <- epr %>%
  group_by(country, year) %>%
  summarise(eprratio = sum(size[exclusion == 1]))

epr$cown <- countrycode(epr$country, "country.name", "cown", warn = TRUE)

epr <- epr %>%
  ungroup()%>%
  select(cown, year, eprratio)

#### Create final basic dataset ####

vdem <- vdem %>%
  left_join(gwf, c("cown", "year"))

vdem <- vdem %>%
  left_join(epr, by = c("cown", "year"))
rm(epr, eprlong, eprlongcy)

#### Introduction examples ####

#### Women's political exclusion 1900 by regime types ####

vdem1900 <- vdem %>%
  filter(year==1900) %>%
  mutate(reg_type = case_when(v2x_regime<2 ~ "autocratic", 
                              v2x_regime>=2 ~ "democratic")) %>%
  group_by(reg_type) %>%
  summarize(mean_gender = mean(v2xpe_exlgender,na.rm=TRUE))

#### women*s political exclusion 2018 by regime types ####

vdem2018 <- vdem %>%
  filter(year==2018) %>%
  mutate(reg_type = case_when(v2x_regime<2 ~ "autocratic", 
                              v2x_regime>=2 ~ "democratic")) %>%
  group_by(reg_type) %>%
  summarize(mean_gender = mean(v2xpe_exlgender,na.rm=TRUE))

vdem2018 <- vdem %>%
  filter(year==2018) %>%
  mutate(reg_type = case_when(v2x_regime<2 ~ "autocratic", 
                              v2x_regime>=2 ~ "democratic")) %>%
  select(country_name, year, v2xpe_exlgender, reg_type) %>%
  filter(reg_type == "autocratic")

### Figure 1 Main paper####

vdem_fig1 <- vdem %>%
  mutate(reg_type = case_when(v2x_regime<2 ~ "autocratic", 
                              v2x_regime>=2 ~ "democratic")) %>%
  filter(year>=1900) %>%
  group_by(reg_type, year) %>%
  summarize(mean_gender = mean(v2xpe_exlgender,na.rm=TRUE)) %>%
  drop_na(mean_gender, reg_type)

ggplot(vdem_fig1, aes(x = year, y = mean_gender, linetype = reg_type)) +
  geom_line() +
  labs(linetype = "Regime Type", 
       x = "Year", 
       y = "Mean Gender Political Exclusion") +
  theme_pubr()

ggsave("calculations/Output/Figure_1_new.pdf", dpi = 1200)

#### Filter authoritarian regimes based on Geddes et al. 2014 ####

vdem <- vdem %>%
  filter(year>=1945) %>%
  filter(gwf_nonautocracy!="democracy")

#### Analysis of (political) exclusion by gender ####

#### Building and reshaping Variables ####

vdem <- vdem %>%
  dplyr::select(cown, country_name, country_id, year, starts_with("gwf"), eprratio, e_mipopula, e_wb_pop, 
         e_migdppcln, starts_with("e_total"), v2lgfemleg , v2elfemrst, e_pefeliex, v2lgqugen, v2elloelsy, 
         v2elparlel, starts_with("v2exl_legitideolcr"), v2exl_legitideol, v2exl_legitlead, v2exl_legitperf, 
         v2exl_legitratio, v2pepwrgen, v2peasjgen, v2peasbgen, v2xpe_exlgender, starts_with("v2elmulpar"), 
         v2xps_party, v2psorgs, v2psprbrch, v2psprlnks, v2psplats, v2pscohesv, v2x_regime, v2clgencl, 
         v2peapsgen,v2peasjgen, v2peasbgen)

## generate variables ##

vdem <- vdem %>%
  mutate(e_wb_pop_ln = log10(e_wb_pop))

vdem <- vdem %>% 
  mutate(v2lgqugen = ifelse(v2lgqugen>=1, 1, 0))

vdem <- vdem %>%
  mutate(gwf_type = ifelse(gwf_military==1, "military", 
                           ifelse(gwf_party==1, "party", 
                                  ifelse(gwf_monarchy==1, "monarchy", 
                                         ifelse(gwf_personal ==1, "personal", NA)))))

summary(vdem$gwf_type) 

## Iploate Multiparty Election Variable ##

vdem <- vdem %>%
  group_by(cown) %>%
  fill(v2elmulpar)

## Rescale Natural Resource Variable ##

vdem <- vdem %>%
  mutate(e_total_resources_income_pc = e_total_resources_income_pc/1000)

summary(vdem$e_total_resources_income_pc)

#### Bayesian Factor Analysis for Input and Output Dimension of Gender Inclusion #####

#gender exclusion index
summary(vdem$v2xpe_exlgender)

## Input Dimension ##
# power distributed by gender: v2pepwgen
# equality in respect for civil liberties by gender: v2clgencl

posterior_vdem_input <- MCMCfactanal(~v2pepwrgen + v2clgencl, factors=1,
                                      verbose=0, store.scores=FALSE, a0=1, b0=0.15,
                                      data=vdem, burnin=1000, mcmc=20000, thin=1)

## First, the loadings (Lambda). ##

loadings.input <- summary(posterior_vdem_input)$statistics[1:2]
loadings.input
ci.loadings.input <- summary(posterior_vdem_input)$quantiles[1:2,c(1,5)]
ci.loadings.input

## Second, Uniqueness ##
uniquenesses.input <- data.frame(summary(posterior_vdem_input)$statistics[3:4])
names(uniquenesses.input)[1] <- "uniquenesses"
uniquenesses.input

## extract the point estimates from the Bayesian factor analysis

posterior_vdem_input <- MCMCfactanal(~v2pepwrgen + v2clgencl, factors=1,
                                     verbose=0, store.scores=TRUE, a0=1, b0=0.15,
                                     data=vdem, burnin=1000, mcmc=20000, thin=1)

loadings.input.df <- summary(posterior_vdem_input)$statistics

loadings.input.df <- loadings.input.df[5:4327,]

loadings.input.df <- as_tibble(loadings.input.df) %>%
  dplyr::select(Mean, SD) %>%
  rename(gender_inclusion_input_mean = Mean, 
         gender_inclusion_input_sd = SD)

vdem_input <- vdem %>%
  drop_na(v2pepwrgen, v2clgencl) %>%
  dplyr::select(country_name, year, cown, v2pepwrgen, v2clgencl,v2xpe_exlgender)

vdem_input <- vdem_input %>%
  bind_cols(loadings.input.df)

## Output Dimension##
# access to public serives by gender (v2peapsgen)
# access to state jobs by gender (v2peasjgen)
# access to state business opportunities by gender (v2peasbgen)

posterior_vdem_output <- MCMCfactanal(~v2peapsgen + v2peasjgen + v2peasbgen, factors=1,
                                      verbose=0, store.scores=FALSE, a0=1, b0=0.15,
                                      data=vdem, burnin=1000, mcmc=20000, thin=1)

## First, the loadings (Lambda). ##

loadings.output <- summary(posterior_vdem_output)$statistics[1:3]
loadings.output
ci.loadings.output <- summary(posterior_vdem_output)$quantiles[1:3,c(1,5)]
ci.loadings.output

## Second, Uniqueness ##
uniquenesses.output <- data.frame(summary(posterior_vdem_output)$statistics[4:6])
names(uniquenesses.output)[1] <- "uniquenesses"
uniquenesses.output

## extract the point estimates from the Bayesian factor analysis

posterior_vdem_output <- MCMCfactanal(~v2peapsgen + v2peasjgen + v2peasbgen, factors=1,
                                     verbose=0, store.scores=TRUE, a0=1, b0=0.15,
                                     data=vdem, burnin=1000, mcmc=20000, thin=1)

loadings.output.df <- summary(posterior_vdem_output)$statistics

loadings.output.df <- loadings.output.df[7:4301,]

loadings.output.df <- as_tibble(loadings.output.df) %>%
  dplyr::select(Mean, SD) %>%
  rename(gender_inclusion_output_mean = Mean, 
         gender_inclusion_output_sd = SD)

vdem_output <- vdem %>%
  drop_na(v2peapsgen, v2peasjgen, v2peasbgen) %>%
  dplyr::select(country_name, year, cown, v2peapsgen, v2peasjgen, v2peasbgen, v2xpe_exlgender)

vdem_output <- vdem_output %>%
  bind_cols(loadings.output.df)

## Merge datasets ##

vdem_input <- vdem_input %>%
  dplyr::select(country_name, year, cown, gender_inclusion_input_mean, gender_inclusion_input_sd)

vdem_output<- vdem_output %>%
  dplyr::select(country_name, year, cown, gender_inclusion_output_mean, gender_inclusion_output_sd)

vdem <- vdem %>%
  left_join(vdem_input, by = c("country_name", "year", "cown"))

vdem <- vdem %>%
  left_join(vdem_output, by = c("country_name", "year", "cown"))

rm(posterior_vdem_input, posterior_vdem_output)

#### Regime Types Changes within countries ####

vdem_reg_change <- vdem %>%
  group_by(country_name) %>%
  mutate(reg_change = ifelse(gwf_type != lag(gwf_type), 1, 0)) %>%
  dplyr::select(country_name, year, gwf_type, reg_change)

table(vdem_reg_change$reg_change)


#### Generate Sample ####

vdem <- vdem %>%
  drop_na(v2xpe_exlgender)

vdem <- distinct(vdem, country_id, year, .keep_all= TRUE)

# delete those countries with less than 3 country-year observations to dela with singular fit 
vdem <- vdem %>% # delete those countries with less than 3 country-year observations to dela with singular fit 
  dplyr::group_by(cown) %>%
  mutate(num_years = max(year) - min(year)) %>%
  filter(num_years >= 5) %>%
  ungroup(cown)

#### Communist party regimes ####

table(vdem$v2exl_legitideolcr_1) # threshold > 0.8 

vdem <- vdem %>%
  mutate(gwf_type = ifelse(v2exl_legitideolcr_1 >= 0.8 & gwf_type == "party", "communist party", 
                           ifelse(v2exl_legitideolcr_1 < 0.8 & gwf_type == "party", "party", gwf_type)))

table(vdem$gwf_type)

#### Regime Types by Geddes et al and mulitparty competition ####

vdem <- vdem %>%
  group_by(cown) %>%
  fill(v2elmulpar_osp)
summary(vdem$v2elmulpar_osp) # compare L�hrmann et al. 2018 

vdem <- vdem %>%
  mutate(v2elmulpar_osp_bi = case_when(v2elmulpar_osp <= 1 ~ 0, 
                                       v2elmulpar_osp > 1 ~ 1, 
                                       is.na(v2elmulpar_osp) ~ 0))
table(vdem$v2elmulpar_osp_bi) # compare L�hrmann et al. 2018 

vdem <- vdem %>%
  mutate(gwf_type_mp = case_when(v2elmulpar_osp_bi == 1 & gwf_type == "party" ~ "party regime with ME", 
                                 v2elmulpar_osp_bi == 0 & gwf_type == "party" ~ "party regime", 
                                 v2elmulpar_osp_bi == 1 & gwf_type == "monarchy" ~ "monarchy with ME", 
                                 v2elmulpar_osp_bi == 0 & gwf_type == "monarchy" ~ "monarchy", 
                                 v2elmulpar_osp_bi == 1 & gwf_type == "personal" ~ "personal regime with ME", 
                                 v2elmulpar_osp_bi == 0 & gwf_type == "personal" ~ "personal regime", 
                                 v2elmulpar_osp_bi == 1 & gwf_type == "military" ~ "military regime with ME", 
                                 v2elmulpar_osp_bi == 0 & gwf_type == "military" ~ "military regime",
                                 v2elmulpar_osp_bi == 1 & gwf_type == "communist party" ~ "communist party with ME",
                                 v2elmulpar_osp_bi == 0 & gwf_type == "communist party" ~ "communist party")) %>%
  drop_na(gwf_type_mp)

table(vdem$gwf_type_mp)

vdem <- vdem %>%
  ungroup()

saveRDS(vdem, file = "calculations/data/vdem_merged.rds")
